# library import
from utils import *
seq_dict = cons_parser("data/consensi/RM2_consensi.fa.classified")
length_hist(seq_dict)
Stats(seq_dict, classification=True)
| Number of consensi: | 15384 |
| Longest sequence: | 14783 |
| Shortest sequence: | 29 |
| Average length: | 2262.0 |
| LTR | 12565 |
| tRNA | 14 |
| LINE | 513 |
| DNA | 147 |
| RC | 34 |
| SINE | 22 |
| rRNA | 8 |
| Satellite | 5 |
| Unknown | 2069 |
| Simple_repeat | 7 |
reads_hist("data/bam/RM2_consensi_check.sam", "data/consensi/RM2_consensi.fa.classified")
seq_dict = cons_parser("data/consensi/EDTA_consensi.fa")
length_hist(seq_dict)
Stats(seq_dict, True)
| Number of consensi: | 16191 |
| Longest sequence: | 16685 |
| Shortest sequence: | 80 |
| Average length: | 1972.0 |
| DNA | 4725 |
| LTR | 10166 |
| MITE | 1300 |
reads_hist("data/bam/EDTA_consensi_check.sam", "data/consensi/EDTA_consensi.fa")
seq_lens = []
for seq_record in SeqIO.parse("data/consensi/MITE_consensi.fa", "fasta"):
seq_id = seq_record.id
seq_lens.append(len(seq_record.seq))
trace = go.Histogram(x=seq_lens)
fig = go.Figure(data=trace, layout=go.Layout(title="MITE-Tracker", xaxis=dict(title="sequence length"), yaxis=dict(title="count")))
fig.show()
Stats(seq_lens, False)
| Number of consensi: | 10863 |
| Longest sequence: | 800 |
| Shortest sequence: | 49 |
| Average length: | 289.0 |
reads_hist("data/bam/MITE_consensi_check.sam", "data/consensi/MITE_consensi.fa")
seq_lens = []
for seq_record in SeqIO.parse("data/consensi/RM_consensi.fa", "fasta"):
seq_id = seq_record.id
seq_lens.append(len(seq_record.seq))
trace = go.Histogram(x=seq_lens)
fig = go.Figure(data=trace, layout=go.Layout(title="RepeatMasker", xaxis=dict(title="sequence length"), yaxis=dict(title="count")))
fig.show()
Stats(seq_lens, False)
| Number of consensi: | 12214 |
| Longest sequence: | 43821 |
| Shortest sequence: | 30 |
| Average length: | 2197.0 |
sed -e '/@/,+1d;/SAT/,+1d;/^\s*$/d' RM_consensi.fa > RM_consensi.fa.trimmed
seq_lens = []
for seq_record in SeqIO.parse("data/consensi/RM_consensi.fa.trimmed", "fasta"):
seq_id = seq_record.id
seq_lens.append(len(seq_record.seq))
if len(seq_record.seq) == 0:
print(seq_id)
trace = go.Histogram(x=seq_lens)
fig = go.Figure(data=trace, layout=go.Layout(title="RepeatMasker", xaxis=dict(title="sequence length"), yaxis=dict(title="count")))
fig.show()
Stats(seq_lens, False)
| Number of consensi: | 12120 |
| Longest sequence: | 43821 |
| Shortest sequence: | 30 |
| Average length: | 2213.0 |